{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1a24eb5d",
   "metadata": {},
   "source": [
    "## ratings[~ratings.isna().T.any()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7774cf79",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d5231a34",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\n",
    "    \"userId\": [1, 2, 3, 4, 5, 6],\n",
    "    \"rating\": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0]\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "35149cca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  rating\n",
       "0       1     3.0\n",
       "1       2     4.0\n",
       "2       3     5.0\n",
       "3       4     6.0\n",
       "4       5     7.0\n",
       "5       6     8.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "67b47ecd",
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\users\\yejx\\.conda\\envs\\yjx36\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n",
      "c:\\users\\yejx\\.conda\\envs\\yjx36\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "df[\"rating\"][0] = np.nan\n",
    "df[\"rating\"][1] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9ebe7d23",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  rating\n",
       "0       1     NaN\n",
       "1       2     NaN\n",
       "2       3     5.0\n",
       "3       4     6.0\n",
       "4       5     7.0\n",
       "5       6     8.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b3ac9047",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  rating\n",
       "0   False    True\n",
       "1   False    True\n",
       "2   False   False\n",
       "3   False   False\n",
       "4   False   False\n",
       "5   False   False"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "41bf8ba4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>rating</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0      1      2      3      4      5\n",
       "userId  False  False  False  False  False  False\n",
       "rating   True   True  False  False  False  False"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "cc2eafad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     True\n",
       "1     True\n",
       "2    False\n",
       "3    False\n",
       "4    False\n",
       "5    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna().T.any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "7288d7de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  rating\n",
       "0       1     NaN\n",
       "1       2     NaN"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.isna().T.any()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "cfcfa3ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  rating\n",
       "2       3     5.0\n",
       "3       4     6.0\n",
       "4       5     7.0\n",
       "5       6     8.0"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[~df.isna().T.any()]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a567844",
   "metadata": {},
   "source": [
    "## pd.concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "03070470",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({\n",
    "    \"a\": [1, 2],\n",
    "    \"b\": [\"x\", \"y\"]\n",
    "})\n",
    "df2 = pd.DataFrame({\n",
    "    \"a\": [3, 4],\n",
    "    \"b\": [\"n\", \"m\"]\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "3e8d5a72",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>y</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  1  x\n",
       "1  2  y"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "e88081a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>m</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  3  n\n",
       "1  4  m"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "a4f06b05",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>m</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  1  x\n",
       "1  2  y\n",
       "0  3  n\n",
       "1  4  m"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1, df2], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "e7f18c85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>m</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  3  n\n",
       "1  2  y\n",
       "0  1  x\n",
       "1  4  m"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1, df2], axis=0).sample(frac=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e4f31ec6",
   "metadata": {},
   "source": [
    "## 词云可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "96a37b70",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    今天 中午 吃 什么\n",
       "1    今天 早上 没 吃饭\n",
       "Name: text, dtype: object"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = pd.DataFrame({\n",
    "    \"text\": [\"今天 中午 吃 什么\", \"今天 早上 没 吃饭\"]\n",
    "})\n",
    "df3[\"text\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b80512de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['今天 中午 吃 什么', '今天 早上 没 吃饭']"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3[\"text\"].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "3fbfb9a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'今天 中午 吃 什么 今天 早上 没 吃饭'"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\" \".join(df3[\"text\"].tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "960ee345",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['今天', '中午', '吃', '什么', '今天', '早上', '没', '吃饭']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\" \".join(df3[\"text\"].tolist()).split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a51e4782",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'今天': 2, '中午': 1, '吃': 1, '什么': 1, '早上': 1, '没': 1, '吃饭': 1}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_list = \" \".join(df3[\"text\"].tolist()).split()\n",
    "fre_dict = {}\n",
    "for one in word_list:\n",
    "    if one in fre_dict:\n",
    "        fre_dict[one] += 1\n",
    "    else:\n",
    "        fre_dict[one] = 1\n",
    "fre_dict"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81b17f49",
   "metadata": {},
   "source": [
    "## 雷达图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "1b1f0a9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_record = pd.DataFrame({\n",
    "    \"env\": [1.0, 2.0, 1.0, 2.0, 1.0, 3.0],\n",
    "    \"flavor\": [4.0, 1.0, 2.0, 3.0, 3.0, 3.0],\n",
    "    \"service\": [3.0, 1.0, 1.0, 1.0, 1.0, 10]\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "ac503c7d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>env</th>\n",
       "      <th>flavor</th>\n",
       "      <th>service</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   env  flavor  service\n",
       "0  1.0     4.0      3.0\n",
       "1  2.0     1.0      1.0\n",
       "2  1.0     2.0      1.0\n",
       "3  2.0     3.0      1.0\n",
       "4  1.0     3.0      1.0\n",
       "5  3.0     3.0     10.0"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_record"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "d803cce7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0    3\n",
       "2.0    2\n",
       "3.0    1\n",
       "Name: env, dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env_column = df_record[\"env\"]\n",
    "env = env_column.value_counts()\n",
    "env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "3d8fd0f1",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([1.0, 2.0, 3.0], [3, 2, 1])"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.index.values.tolist(), env.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "330d5146",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1.0, 3)\n",
      "(2.0, 2)\n",
      "(3.0, 1)\n"
     ]
    }
   ],
   "source": [
    "for i in zip(env.index.values.tolist(), env.tolist()):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "f4e92b28",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1.0: 3, 2.0: 2, 3.0: 1}"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env_dict = dict(zip(env.index.values.tolist(), env.tolist()))\n",
    "env_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "a6617c01",
   "metadata": {},
   "outputs": [],
   "source": [
    "env_dict_final = {1.0:0, 2.0:0, 3.0:0, 4.0:0, 5.0:0}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "e0740a3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1.0: 3, 2.0: 2, 3.0: 1, 4.0: 0, 5.0: 0}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env_dict_final.update(env_dict)\n",
    "env_dict_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "ed4819cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = [env_dict_final]\n",
    "sorce = [[v for v in result.values()] for result in results]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "b26e5803",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[3, 2, 1, 0, 0]]"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorce"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "6083d9a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3, 1, 3, 4]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = [1,2,3]\n",
    "b = [1,3,4]\n",
    "c = [*a, *b]\n",
    "c"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "af9e75b1",
   "metadata": {},
   "source": [
    "## 分组聚合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "6b31e410",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>env</th>\n",
       "      <th>flavor</th>\n",
       "      <th>service</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  env  flavor  service\n",
       "0   1  1.0     2.0      5.0\n",
       "1   2  2.0     3.0      5.0\n",
       "2   3  3.0     3.0      3.0\n",
       "3   1  4.0     4.0      4.0\n",
       "4   2  1.0     1.0      1.0\n",
       "5   3  2.0     2.0      2.0"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    \"id\": [1, 2, 3, 1, 2, 3],\n",
    "    \"env\": [1.0, 2.0, 3.0, 4.0, 1.0, 2.0],\n",
    "    \"flavor\": [2.0, 3.0, 3.0, 4.0, 1.0, 2.0],\n",
    "    \"service\": [5.0, 5.0, 3.0, 4.0, 1.0, 2.0]\n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "70ac0310",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001FC1A58A5F8>"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "8d8b5629",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df_g = df.groupby(\"id\").agg([np.mean, np.sum])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "f6dc9124",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">env</th>\n",
       "      <th colspan=\"2\" halign=\"left\">flavor</th>\n",
       "      <th colspan=\"2\" halign=\"left\">service</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    env      flavor      service     \n",
       "   mean  sum   mean  sum    mean  sum\n",
       "id                                   \n",
       "2   1.5  3.0    2.0  4.0     3.0  6.0\n",
       "1   2.5  5.0    3.0  6.0     4.5  9.0\n",
       "3   2.5  5.0    2.5  5.0     2.5  5.0"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sort = df_g.sort_values([(\"env\",\"mean\"),(\"env\",\"sum\")], ascending=[True, False])\n",
    "df_sort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "9e2e6119",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2011-03-16 13:22:00'"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "t = 1300252920000\n",
    "tformat = time.strftime(\"%Y-%m-%d %H:%M:%S\", time.localtime(t/1000))\n",
    "tformat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "3e0efa6f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2011-03'"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tformat[:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59f3e284",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
